{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "data_preprocessing.ipynb",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "IW6u46qHXPWw"
      },
      "source": [
        "# Environmental Sound Classification using Deep Learning\n",
        "## >> Data Preprocessing"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "VsGk0b8gURNP"
      },
      "source": [
        "You may need to install librosa using pip as follows:\n",
        "\n",
        "> **!pip install librosa==0.8.0**\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "6JizWUZGNS2q"
      },
      "source": [
        "import os\n",
        "\n",
        "import librosa\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "\n",
        "from tqdm import tqdm"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "oOPAYJmbUzRS"
      },
      "source": [
        "---"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "zhbAwDJgZsCU"
      },
      "source": [
        "## 0. Download and extract audio data\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "JdQNB-r7XgFF",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "e3c4d503-c984-49a8-b2ef-0c1ed3c6b2bb"
      },
      "source": [
        "USE_GOOGLE_COLAB = True\n",
        "ROOT_FOLDER_NAME = 'US8K'\n",
        "\n",
        "if USE_GOOGLE_COLAB:\n",
        "    # mount google drive\n",
        "    from google.colab import drive \n",
        "    drive.mount('/content/gdrive')\n",
        "\n",
        "    # create and change working directory\n",
        "    %cd gdrive/'My Drive'\n",
        "\n",
        "%mkdir $ROOT_FOLDER_NAME\n",
        "%cd $ROOT_FOLDER_NAME"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Mounted at /content/gdrive\n",
            "/content/gdrive/My Drive\n",
            "/content/gdrive/My Drive/US8K\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "duUqS0jbdZkf"
      },
      "source": [
        "DOWNLOAD_DATASET = True\n",
        "EXTRACT_DATASET = True\n",
        "\n",
        "DATASET_URL = \"https://goo.gl/8hY5ER\"\n",
        "\n",
        "if DOWNLOAD_DATASET:\n",
        "    !wget $DATASET_URL\n",
        "\n",
        "if EXTRACT_DATASET:\n",
        "    !tar xf 8hY5ER"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "vM0oDr37iE4M"
      },
      "source": [
        "---"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "pgZ8e_g4h1VC"
      },
      "source": [
        "# set paths to the UrbanSound8K dataset and metadata file\n",
        "US8K_AUDIO_PATH = os.path.abspath('UrbanSound8K/audio/')\n",
        "US8K_METADATA_PATH = os.path.abspath('UrbanSound8K/metadata/UrbanSound8K.csv')"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "L0Y7DDnxXrZX",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 424
        },
        "outputId": "0bb7ad7a-11e9-405f-f167-9a67c667e0e8"
      },
      "source": [
        "# load the csv metadata file into a Pandas DataFrame structure\n",
        "us8k_metadata_df = pd.read_csv(US8K_METADATA_PATH,\n",
        "                               usecols=[\"slice_file_name\", \"fold\", \"classID\"],\n",
        "                               dtype={\"fold\": \"uint8\", \"classID\" : \"uint8\"})\n",
        "\n",
        "us8k_metadata_df"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>slice_file_name</th>\n",
              "      <th>fold</th>\n",
              "      <th>classID</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>100032-3-0-0.wav</td>\n",
              "      <td>5</td>\n",
              "      <td>3</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>100263-2-0-117.wav</td>\n",
              "      <td>5</td>\n",
              "      <td>2</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>100263-2-0-121.wav</td>\n",
              "      <td>5</td>\n",
              "      <td>2</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>100263-2-0-126.wav</td>\n",
              "      <td>5</td>\n",
              "      <td>2</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>100263-2-0-137.wav</td>\n",
              "      <td>5</td>\n",
              "      <td>2</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8727</th>\n",
              "      <td>99812-1-2-0.wav</td>\n",
              "      <td>7</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8728</th>\n",
              "      <td>99812-1-3-0.wav</td>\n",
              "      <td>7</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8729</th>\n",
              "      <td>99812-1-4-0.wav</td>\n",
              "      <td>7</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8730</th>\n",
              "      <td>99812-1-5-0.wav</td>\n",
              "      <td>7</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8731</th>\n",
              "      <td>99812-1-6-0.wav</td>\n",
              "      <td>7</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>8732 rows × 3 columns</p>\n",
              "</div>"
            ],
            "text/plain": [
              "         slice_file_name  fold  classID\n",
              "0       100032-3-0-0.wav     5        3\n",
              "1     100263-2-0-117.wav     5        2\n",
              "2     100263-2-0-121.wav     5        2\n",
              "3     100263-2-0-126.wav     5        2\n",
              "4     100263-2-0-137.wav     5        2\n",
              "...                  ...   ...      ...\n",
              "8727     99812-1-2-0.wav     7        1\n",
              "8728     99812-1-3-0.wav     7        1\n",
              "8729     99812-1-4-0.wav     7        1\n",
              "8730     99812-1-5-0.wav     7        1\n",
              "8731     99812-1-6-0.wav     7        1\n",
              "\n",
              "[8732 rows x 3 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 22
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "zm96JrdyUv4C"
      },
      "source": [
        "---"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "AcBXauxHhwUS"
      },
      "source": [
        "## 1. Feature Extraction\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "e-LlBsWDU-vz"
      },
      "source": [
        "###### Extract a log-mel spectrogram for each audio file in the dataset and store it into a Pandas DataFrame along with its class and fold label."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "mGOr9ZEseJF0"
      },
      "source": [
        "HOP_LENGTH = 512        # number of samples between successive frames\n",
        "WINDOW_LENGTH = 512     # length of the window in samples\n",
        "N_MEL = 128             # number of Mel bands to generate\n",
        "\n",
        "\n",
        "def compute_melspectrogram_with_fixed_length(audio, sampling_rate, num_of_samples=128):\n",
        "    try:\n",
        "        # compute a mel-scaled spectrogram\n",
        "        melspectrogram = librosa.feature.melspectrogram(y=audio, \n",
        "                                                        sr=sampling_rate, \n",
        "                                                        hop_length=HOP_LENGTH,\n",
        "                                                        win_length=WINDOW_LENGTH, \n",
        "                                                        n_mels=N_MEL)\n",
        "\n",
        "        # convert a power spectrogram to decibel units (log-mel spectrogram)\n",
        "        melspectrogram_db = librosa.power_to_db(melspectrogram, ref=np.max)\n",
        "        \n",
        "        melspectrogram_length = melspectrogram_db.shape[1]\n",
        "        \n",
        "        # pad or fix the length of spectrogram \n",
        "        if melspectrogram_length != num_of_samples:\n",
        "            melspectrogram_db = librosa.util.fix_length(melspectrogram_db, \n",
        "                                                        size=num_of_samples, \n",
        "                                                        axis=1, \n",
        "                                                        constant_values=(0, -80.0))\n",
        "    except Exception as e:\n",
        "        print(\"\\nError encountered while parsing files\\n>>\", e)\n",
        "        return None \n",
        "    \n",
        "    return melspectrogram_db"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ei7J4d-tdjs6",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "fd769ecd-d6d7-42cd-fec9-0aefde983fd0"
      },
      "source": [
        "SOUND_DURATION = 2.95   # fixed duration of an audio excerpt in seconds\n",
        "\n",
        "features = []\n",
        "\n",
        "# iterate through all dataset examples and compute log-mel spectrograms\n",
        "for index, row in tqdm(us8k_metadata_df.iterrows(), total=len(us8k_metadata_df)):\n",
        "    file_path = f'{US8K_AUDIO_PATH}/fold{row[\"fold\"]}/{row[\"slice_file_name\"]}'\n",
        "    audio, sample_rate = librosa.load(file_path, duration=SOUND_DURATION, res_type='kaiser_fast')\n",
        "    \n",
        "    melspectrogram = compute_melspectrogram_with_fixed_length(audio, sample_rate)\n",
        "    label = row[\"classID\"]\n",
        "    fold = row[\"fold\"]\n",
        "    \n",
        "    features.append([melspectrogram, label, fold])\n",
        "\n",
        "# convert into a Pandas DataFrame \n",
        "us8k_df = pd.DataFrame(features, columns=[\"melspectrogram\", \"label\", \"fold\"])"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            " 41%|████      | 3554/8732 [03:44<05:24, 15.95it/s]/usr/local/lib/python3.6/dist-packages/librosa/core/spectrum.py:224: UserWarning: n_fft=2048 is too small for input signal of length=1323\n",
            "  n_fft, y.shape[-1]\n",
            " 95%|█████████▌| 8324/8732 [08:39<00:20, 20.33it/s]/usr/local/lib/python3.6/dist-packages/librosa/core/spectrum.py:224: UserWarning: n_fft=2048 is too small for input signal of length=1103\n",
            "  n_fft, y.shape[-1]\n",
            "/usr/local/lib/python3.6/dist-packages/librosa/core/spectrum.py:224: UserWarning: n_fft=2048 is too small for input signal of length=1523\n",
            "  n_fft, y.shape[-1]\n",
            "100%|██████████| 8732/8732 [09:03<00:00, 16.06it/s]\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "uVEFwKqoiI4P"
      },
      "source": [
        "---"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "1zgmwIW1p64C"
      },
      "source": [
        "### Store the data"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9OVKTKBlnKb8"
      },
      "source": [
        "# write the Pandas DataFrame object to .pkl file\n",
        "WRITE_DATA = True\n",
        "\n",
        "if WRITE_DATA:\n",
        "  us8k_df.to_pickle(\"us8k_df.pkl\")"
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}